## Code to examine how voter turnout is affected by job loss, in the short and long run. 
## These are the results presented in Figure 4 in the main text. 
## Last changed: 2025-05-23 (KOL)

sink("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Output/Figure4.txt", split=TRUE)

library(PSweight)
library(cobalt)
library(MatchIt)
library(distances)
library(moreparty)
library(party)
library(caret)
library(bonsai)
library(randomForest)
library(tidymodels)
library(tidyverse)
library(rmarkdown)
library(tinytex)
library(tibble)
library(knitr)
library(car)
library(fst)
library(stargazer)
library(pryr)
library(stringr)
library(fst)
library(lubridate)
library(data.table)
library(summarytools)
library(ggplot2)
library(cowplot)
library(ggpubr)
library(haven)
library(broom)
library(fixest)
library(modelsummary)
library(gt)
library(MASS)
library(survey)
library(collapse)
library(readxl)
library(Hmisc)
library(DescTools)
library(ranger)
library(stablelearner)
library(future.apply)
library(lme4)
library(partykit)
library(ggiplot)


# Specify the name of the treatment variable
  tvar <- "T94_90"

# Specify the the birth cohorts to be used for the analysis
  lbyr <- 1942
  ubyr <- 1967

# Specify if only include Swedish citizens 
  citizen <- 1

# Specify the propensity score specification
  psformula <- formula(T94 ~
                       Kon + DAStod_1982 + DAStod_1983 + DAStod_1984 +
                       DAStod_1985 + DAStod_1986 + DAStod_1987 + DAStod_1988 +
                       DAStod_1989 +
                       DAntBarn_91 + Sambo_91 + InvBak |
                       UtbAr_91 + SSYK3_90 + SNI2_91 + Kommun_91 +
                       rCARB_1982 + rCARB_1983 + rCARB_1984 + rCARB_1985 +
                       rCARB_1986 + rCARB_1987 + rCARB_1988 + rCARB_1989 +
                       pArbInk_1990 + pArbInk_1991 +
                       FodAr + Rost82)

# Create an object in which to save the results.
  Models <-  vector('list', 5)

  j <- 1

## Loop over the SES quartiles, i=5 gives the results for the pooled sample.
for(i in 1:5) { 
  # Read in the data sets
  NomVald <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_NomVald.fst", as.data.table=T)
  RTB <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_RTB.fst", as.data.table=T)
  db <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_Nom.fst", as.data.table=T)
  
  db[, T94 := get(tvar)]
  mdb <- db[between(FodAr, lbyr, ubyr) & (SvMedb82_18 >= citizen),]
  rm(db)
  gc()
  
  # Read in data on voter turnout and merge them onto the main data set.
  VD82  <- read.fst("D:/SCB_ConPol/Fst/VD/Valdelt_82.fst", as.data.table = T)
  VD82 <- VD82[, cID := .N, by = LopNr][cID == 1 & !is.na(LopNr), ][, cID := NULL]
  VD94r <- read.fst("D:/SCB_ConPol/Fst/VD/Valdelt_1994rkl.fst", as.data.table = T)
  VD94r <- VD94r[, cID := .N, by = LopNr][cID == 1 & !is.na(LopNr), ][, cID := NULL]
  VD10  <- read.fst("D:/SCB_ConPol/Fst/VD/Valdelt_2010.fst", as.data.table = T)
  VD10 <- VD10[, cID := .N, by = LopNr][cID == 1 & !is.na(LopNr), ][, cID := NULL]
  VD18  <- read.fst("D:/SCB_ConPol/Fst/VD/Valdelt_2018.fst", as.data.table = T)
  VD18 <- VD18[, cID := .N, by = LopNr][cID == 1 & !is.na(LopNr), ][, cID := NULL]
  VD22  <- read.fst("D:/SCB_ConPol/Fst/VD/Valdelt_2022.fst", as.data.table = T)
  VD22 <- VD22[, cID := .N, by = LopNr][cID == 1 & !is.na(LopNr), ][, cID := NULL]
  
  VD82[r != 3 & !is.na(r), RRost := as.numeric(r %in% c(2, 4, 5, 6))]
  VD82[, Ar := 1982]
  VD94r[Rostratt == 1, RRost := as.numeric(r %in% c(2, 4, 5, 6))]
  VD94r[, Ar := 1994]
  VD10[Rostratt == 1, RRost := as.numeric(r %in% c(2, 4, 5, 6))]
  VD10[, Ar := 2010]
  VD18[Rostratt == 1, RRost := as.numeric(Rrost)]
  VD18[, Ar := 2018]
  VD22[Rostratt == 1, RRost := as.numeric(RD)]
  VD22[, Ar := 2022]
  VD <- rbindlist(list(VD82[, .(LopNr, RRost, Ar)], VD94r[, .(LopNr, RRost, Ar)],
                       VD10[, .(LopNr, RRost, Ar)], VD18[, .(LopNr, RRost, Ar)], VD22[, .(LopNr, RRost, Ar)]))
  
  VD[!is.na(RRost) & Ar>1982, AntVal := .N, by= LopNr]
  VD[, .N, by=AntVal]
  
  # Extract data on labor earnings for the 1980s
  for(k in 1982:1989) {
    mdb[get(paste0("CARB_", k)) > 0, paste0("rCARB_", k) := cut_number(get(paste0("CARB_", k)), 100, labels = FALSE)]
    mdb[is.na(get(paste0("rCARB_", k))), paste0("rCARB_", k) := 0]
  }
  
  antVD <- unique(VD[!is.na(AntVal), .(LopNr, AntVal)])
  
  setnames(VD82, "RRost", "Rost82")
  mdb <- VD82[, .(LopNr, Rost82)][mdb, on = "LopNr"]
  mdb[FodAr>1964, Rost82 := -99]
  mdb[FodAr==1964 & FodMan>8, Rost82 := -99]
  mdb[is.na(Rost82), Rost82:=-99]
   
  mdb <- antVD[mdb, on = "LopNr"]
  mdb[is.na(AntVal), AntVal := 0]
  mdb <- mdb[AntVal==4,]
  
  rm(VD82, VD94r, VD10, VD18)
  gc()
  
  # Split the data in SES quartiles
  mdb[!is.na(avSES3), ravSES := cut_number(avSES3, 4, labels = FALSE)]
  
  if(i==1) mdb <- mdb[between(ravSES, 1, 1)]
  if(i==2) mdb <- mdb[between(ravSES, 2, 2)]
  if(i==3) mdb <- mdb[between(ravSES, 3, 3)]
  if(i==4) mdb <- mdb[between(ravSES, 4, 4)]
  
  print(qsu(mdb[, .(avSES, UtbAr_91, FodAr)]))
  print(qsu(mdb$FodAr))
  
  # Use logit model to estimate propensity scores
  mdb <- mdb[complete.cases(mdb),]
  tm <- proc.time()
  logmod <- feglm(psformula,  
                  data = mdb, family="binomial")
  proc.time()-tm
  print(summary(logmod))
  
  # Extract predicted probability (propensity scores)
  if(logmod$nobs == logmod$nobs_origin){ 
    mdb[, pT94 := predict(logmod, type = "response")]
  }else{
    mdb[logmod$obs_selection$obsRemoved, pT94 := predict(logmod, type = "response")]
  }
  
  mdb <- mdb[complete.cases(mdb),]
  print(qsu(mdb[, .(FodAr, UtbAr_91)]))
  
  # Now use the Full matching approach of Sävje et al. to obtain ATT and ATE weights.  
  
  tmp <- proc.time()
  set.seed(7892)
  m.out <- matchit(T94~1, data = mdb,
                   distance = mdb$pT94, method = "quick", estimand = "ATT")
  proc.time()-tmp
  m.data1 <- as.data.table(match.data(m.out))[, .(LopNr, weights)]
  setnames(m.data1, "weights", "gm_att")
  
  # Extract the matching weights and merge them onto the main data set.
  mdb <- m.data1[, .(LopNr, gm_att)][mdb, on = "LopNr"]
  gc()
  RTB <- mdb[RTB, on = "LopNr"]
  
  rm(mdb)
  gc()
  
  # Remove R objects that are not needed.
  rm(list=setdiff(ls(), c("NomVald", "RTB", "rtbvar", "Models", "Models2", "j", "tvar", "lbyr", "ubyr", "citizen", "psformula", "VD")))
  gc()
  
  mdb <- RTB[!is.na(gm_att), 
             .(Kon, Nom82, Nom85, Nom88, Nom91, Sysselsatt_85, DAntBarn_91, Sambo_91, 
               DStud_91, DForLed_91, DSjukRe_91, DSocBidrFam_91, DBostBidrFam_91,
               UtbAr_91, FodAr, InvBak, isei, Yrke80_90, SNI2_90, Kommun_91, fp_LoneInk_85, fp_LoneInk_90, fp_LoneInk_91,
               gm_att, Nom, LopNr, T94, Ar, pT94)]
  
  gc()
  
  mdb <- VD[mdb, on = c("LopNr", "Ar")]
  
  # Recode the dependent variable to take on the values of 0 and 100, rather than 0 and 1.
  mdb[, RRost := RRost*100]
  
  rm(RTB)
  gc()
  
# Define an indicator taking on the value of 1 for all post-treatment years.  
  mdb[, Post := as.numeric(Ar>1991)]
  
# Estimate the short-run effects on turnout, using our matching weights. These results are used in Figure 4.  
  Models[[j]] <- feols(RRost~T94+i(Post, T94, ref=0) | Ar, cluster ="LopNr", 
                       weights = mdb[Ar %in% c(1982, 1994), gm_att], 
                       mdb[Ar %in% c(1982, 1994)])
  
  # Estimate the long-run effects on turnout, using our matching weights. These results are used in Figure 4.
  Models[[j+1]] <- feols(RRost~T94+i(Post, T94, ref=0) | Ar, cluster ="LopNr", 
                         weights = mdb[Ar %in% c(1982, 2010, 2018, 2022), gm_att], 
                         mdb[Ar %in% c(1982, 2010, 2018, 2022)])
  
  print(summary(Models[[j]]))
  print(summary(Models[[j+1]]))
  print(qsu(mdb[Ar == 1982, RRost]))
  j <- j+2
}  


# Now assemble and arrange the regression results to produce Figure 4.

  Models_short <- Models[c(1, 3, 5, 7, 9)]
  Models_long <- Models[c(2, 4, 6, 8, 10)]

  b1_sh <- coef(Models_short[[1]])["Post::1:T94"]
  b2_sh <- coef(Models_short[[2]])["Post::1:T94"]
  b3_sh <- coef(Models_short[[3]])["Post::1:T94"]
  b4_sh <- coef(Models_short[[4]])["Post::1:T94"]
  b5_sh <- coef(Models_short[[5]])["Post::1:T94"]
  
  se1_sh <- se(Models_short[[1]])["Post::1:T94"]
  se2_sh <- se(Models_short[[2]])["Post::1:T94"]
  se3_sh <- se(Models_short[[3]])["Post::1:T94"]
  se4_sh <- se(Models_short[[4]])["Post::1:T94"]
  se5_sh <- se(Models_short[[5]])["Post::1:T94"]
  
  b_sh <- c(b1_sh, b2_sh, b3_sh, b4_sh, b5_sh)
  se_sh <- c(se1_sh, se2_sh, se3_sh, se4_sh, se5_sh)
  
  b1_lo <- coef(Models_long[[1]])["Post::1:T94"]
  b2_lo <- coef(Models_long[[2]])["Post::1:T94"]
  b3_lo <- coef(Models_long[[3]])["Post::1:T94"]
  b4_lo <- coef(Models_long[[4]])["Post::1:T94"]
  b5_lo <- coef(Models_long[[5]])["Post::1:T94"]
  
  se1_lo <- se(Models_long[[1]])["Post::1:T94"]
  se2_lo <- se(Models_long[[2]])["Post::1:T94"]
  se3_lo <- se(Models_long[[3]])["Post::1:T94"]
  se4_lo <- se(Models_long[[4]])["Post::1:T94"]
  se5_lo <- se(Models_long[[5]])["Post::1:T94"]
  
  b_lo <- c(b1_lo, b2_lo, b3_lo, b4_lo, b5_lo)
  se_lo <- c(se1_lo, se2_lo, se3_lo, se4_lo, se5_lo)
  
  ses <- c("Q1", "Q2", "Q3", "Q4", "All")
  
  db_sh <- data.table(b=b_sh, se=se_sh, ses=ses, tid="sh")
  db_lo <- data.table(b=b_lo, se=se_lo, ses=ses, tid="lo")
  db <- rbindlist(list(db_sh, db_lo))
  db
  
  db[ses == 1, adj := -.25/2 ]
  db[ses == 2, adj := 0/2 ]
  db[ses == 3, adj := .25/2 ]
  db[ses == 4, adj := .5/2 ]
  db[ses == 5, adj := .75/2 ]
  db[, ci_low := b-1.96*se]
  db[, ci_high := b+1.96*se]
  
  g0 <-  ggplot(db[tid=="sh",], aes(ses, b)) +
    geom_point()+
    geom_hline(yintercept=0, linetype="longdash") +
    geom_errorbar(aes(ymin=ci_low,ymax=ci_high), width=0.1, size=.6)+
    scale_y_continuous(limits = c(-3.5, .5), breaks = round(seq(-3.5, .5, by = .5), digits=1)) +
    xlab("SES Group") +
    ylab("Effect on Turnout (pp.)") +
    theme_classic(base_size = 15) +
    theme(legend.title=element_blank()) +
    ggtitle("Short-Run Effects (1994)")+
    theme(legend.position = "bottom", panel.grid.major.y = element_line(color = "grey95"))
  g0 
  
  g1 <-  ggplot(db[tid=="lo",], aes(ses, b)) +
    geom_point()+
    geom_hline(yintercept=0, linetype="longdash") +
    geom_errorbar(aes(ymin=ci_low,ymax=ci_high), width=0.1, size=.6)+
    scale_y_continuous(limits = c(-3.5, .5), breaks = round(seq(-3.5, .5, by = .5), digits=1)) +
    theme_classic(base_size = 15) +
    theme(legend.title=element_blank()) +
    xlab("SES Group") +
    ylab("Effect on Turnout (pp.)") +
    ggtitle("Long-Run Effects (2010, 2018, 2022)")+
    theme(legend.position = "bottom", panel.grid.major.y = element_line(color = "grey95"))
  g1 
  
  ggarrange(g0, g1)
  
  ggsave("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/Figure4.pdf", width = 15, height =7.5, units = "in") 
  ggsave("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/Figure4.eps", width = 15, height =7.5, units = "in")
  
  modelsummary(Models_short, output = "C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Tables/TableFig4_sh.tex")
  modelsummary(Models_long, output = "C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Tables/TableFig4_lo.tex")

  sink()